Subgrad
计算逐元素减法(Sub)操作的梯度。该算子是 Sub 算子的反向传播(backward pass)部分,支持广播。
\[\text{dx1} = \frac{\partial L}{\partial X1} = \frac{\partial L}{\partial Y} \times 1 = \frac{\partial L}{\partial Y}\]
\[\text{dx2} = \frac{\partial L}{\partial X2} = \frac{\partial L}{\partial Y} \times (-1) = -\frac{\partial L}{\partial Y}\]
其中对于前向操作 \(Y = X1 - X2\),dy 是来自后一层的上游梯度,dx1 和 dx2 分别是对 X1 和 X2 的梯度。
- 输入:
dy - 上游梯度数据地址(即 \(\frac{\partial L}{\partial Y}\))。
- params - 参数打包成数组:
dx1_dims - 前向传播时第一个输入 x1 的维度信息数组(int*)。
dx2_dims - 前向传播时第二个输入 x2 的维度信息数组(int*)。
dy_dims - 上游梯度 dy 的维度信息数组(int*)。
num_dims - 维度数(int)。
temp_space - 临时空间。
core_mask - 核掩码(int),仅共享存储版本需要。
- 输出:
dx1 - 对 x1 的梯度数据地址。
dx2 - 对 x2 的梯度数据地址。
- 支持平台:
FT78NEMT7004
备注
MT7004 支持fp16, fp32
FT78NE 支持fp32
当输入张量被广播时,算子会自动处理广播维度的梯度累加
共享存储版本:
-
void hp_sub_grad_s(half *dy, half *dx1, half *dx2, long long *params, int core_mask)
-
void fp_sub_grad_s(float *dy, float *dx1, float *dx2, long long *params, int core_mask)
C调用示例:
1//MT7004示例
2#include <stdio.h>
3#include <subgrad.h>
4
5int main(int argc, char* argv[]) {
6 float *dy = (float *)0x81000000;
7 float *dx1 = (float *)0x82000000;
8 float *dx2 = (float *)0x83000000;
9 float *checkdx1 = (float *)0x84000000;
10 float *checkdx2 = (float *)0x85000000;
11 int *tempsapce = (int *)0x86000000;
12
13 srand(seed++);
14 int i;
15
16 // same shape
17 int dx1_dims[] = {16, 16, 64}; // 2x2
18 int dx2_dims[] = {1, 16, 64}; // 2x2
19 int dy_dims[] = {16, 16, 64}; // 2x2
20 int num_dims = 3;
21
22 int dx1_num = get_total_elements(num_dims, dx1_dims);
23 int dx2_num = get_total_elements(num_dims, dx2_dims);
24 int dy_num = get_total_elements(num_dims, dy_dims);
25
26
27 for (i = 0; i < dy_num; ++i) {
28 dy[i] = (float)(rand() % 100) / 10.0f;
29 }
30
31 long long params[17];
32 params[0] = (unsigned long long)dx1_dims;
33 params[1] = (unsigned long long)dx2_dims;
34 params[2] = (unsigned long long)dy_dims;
35 params[3] = (unsigned long long)num_dims;
36 params[4] = (unsigned long long)tempsapce;
37 int core_mask = 0x0f;
38 fp_sub_grad_s(dy, dx1, dx2, params, core_mask);
39 return 0;
40
41 return 0;
42}
私有存储版本:
-
void hp_subgrad_p(half *dy, long long *params, half *dx1, half *dx2)
-
void fp_subgrad_p(float *dy, long long *params, float *dx1, float *dx2)
C调用示例:
1//MT7004示例
2#include <stdio.h>
3#include <subgrad.h>
4
5int main(int argc, char* argv[]) {
6 float *dy = (float *)0x10010000;
7 float *dx1 = (float *)0x10020000;
8 float *dx2 = (float *)0x10030000;
9 float *checkdx1 = (float *)0x10040000;
10 float *checkdx2 = (float *)0x10050000;
11 int *tempsapce = (int *)0x10060000;
12
13 srand(seed++);
14 int i;
15
16 // same shape
17 int dx1_dims[] = {16, 16, 64}; // 2x2
18 int dx2_dims[] = {1, 16, 64}; // 2x2
19 int dy_dims[] = {16, 16, 64}; // 2x2
20 int num_dims = 3;
21
22 int dx1_num = get_total_elements(num_dims, dx1_dims);
23 int dx2_num = get_total_elements(num_dims, dx2_dims);
24 int dy_num = get_total_elements(num_dims, dy_dims);
25
26
27 for (i = 0; i < dy_num; ++i) {
28 dy[i] = (float)(rand() % 100) / 10.0f;
29 }
30
31 long long params[17];
32 params[0] = (unsigned long long)dx1_dims;
33 params[1] = (unsigned long long)dx2_dims;
34 params[2] = (unsigned long long)dy_dims;
35 params[3] = (unsigned long long)num_dims;
36 params[4] = (unsigned long long)tempsapce;
37
38 fp_sub_grad_p(dy, dx1, dx2, params);
39 return 0;
40}